/************************************************************************************
 * program: char2fmt.sas
 * Programmer: Dan Blanchette   dan_blanchette@unc.edu
 *             The Carolina Population Center at The University of North Carolina at Chapel Hill
 * Developed at:
 *             The Carolina Population Center at The University of North Carolina at Chapel Hill,
 *             Research Computing, UNC-CH, and
 *             Center for Entrepreneurship and Innovation Duke University's Fuqua School of Business
 * date: 20Aug2005
 ** Modified: 09Mar2009 - renamed file to char2fmt.sas
 *           - added varlist option
 ** Modified: 18Mar2008 - set local macro vars to be local.
 *           - made &dset. be the syslast dataset.
 *           - variable labels were not being kept for new numeric vars so
 *              that is now fixed.  quotes and macro vars inside new var labels 
 *              are also okay
 *           - fixed test of invalid formats since the varname may not always
 *              be in lowercase the test now does: lowcase(name).
 ** Modified: 26Sep2007 - fixed &sysver to be &isysver.  since &sysver almost
 *            always contains a decimal value it gets treated as text which SAS
 *            always considers to be greater than any number.
 *
 * Disclaimer:  This program is free to use and to distribute as long as credit is given to
 *                Dan Blanchette
 *                The Carolina Population Center 
 *                University of North Carolina at Chapel Hill
 *
 *               There is no warranty on this software either expressed or implied.  This program
 *                is released under the terms and conditions of GNU General Public License.
 *
 * comments:  macro to encode character variables to numeric using
 *             user-defined formats to store the character string.
 *           - the format name will be the variable name so any format already
 *              existing with the same name will be over written.
 *           - variables with names that end in numbers are not valid format
 *              names so these formats will be changed to end with an underscore
 *           - sort order of data set will be maintained thus if data set is known
 *              to be sorted by one or more character variables those variables
 *              will not be converted.
 *              
 *
 *  WARNING: This changes your data set by changing some/all character variables
 *            to numeric variables.  Make a back-up copy of your data set
 *            before runing CHAR2FMT.     
 *
 ************************************************************************************
 * Options:
 ************************************************************************************
 * dset= enter name of data set (no libref) in the WORK library in which variables are to be encoded.
 *
 * maxlen= set to what maximum length the character variable can be and not
 *          be converted.  Not setting maxlen will convert all character variables.
 *
 * temp_dir= enter directory name you would like have the file _&tfns_longcharvar.sas
 *            created in if you want to see the code generated by CHAR2FMT,
 *            otherwise the work directory will be used and _&tfns_longcharvar.sas
 *            will be deleted when your SAS session is over.
 *
 * varlist= list of variables to be encoded to numeric variables. NOTE: maxlen= option
 *           will overide this if both are specfied and variables specified in varlist
 *           are not longer than what is specified in maxlen.
 *
 * tfns= is for a file name of your choice.  If not set, it will be based on
 *        the SAS &SYSJOBID and &SYSINDEX macro variables.
 *
 ************************************************************************************/

%macro char2fmt(dset= , maxlen= ,  temp_dir= , varlist= , tfns= );

 %** local macro vars used by char2fmt:  *;
 %local csortedby hasvars i isysver maxfilewidth maxvarlength notes obs savastata_err toolong
        temp_dir vallen varlist;


 %** Save option settings so they can be restored at the end of this macro. **;
 %let notes=%sysfunc(getoption(notes));

 %let obs=%sysfunc(getoption(obs));

 options obs=MAX;   %*** Reason for maximizing it is because user could have        **;
                     %*   set it lower than the number of variables in the dataset. **;
 options nonotes;   %*** Shut off notes while program is running in order to reduce log size. **;

 %** make sysver an integer so it can be properly evaluated  **;
 %let isysver = %sysevalf(&sysver.,integer);
 %if &isysver >=9 %then %do;
  options noquotelenmax ;;
 %end;


 proc contents data=work.&dset. 
    %if &varlist. ^=  %then %do;
      (keep= &varlist. )
    %end;
    out=_conten2 noprint;
 run;

 %let toolong=0; 
 %let maxfilewidth=32300; %** as wide as I am willing to make file **;
 %let maxvarlength=32200; %** this needs to be a little smaller than maxfilewidth **; 

 %let hasvars=0;  %** initialize hasvars **;
  data _conten2;
   set _conten2;
   where type=2 
     %if "&maxlen." ^= ""  %then %do;
      and length > &maxlen.  
     %end;
    ;;;
   call symput('hasvars',_n_);
  run;
 
 
  %** if hasvars then do rest of program  **;
 %if (&hasvars. > 0) %then %do;
  
  %if "%nrbquote(&temp_dir)" = ""  %then %let temp_dir = %sysfunc(pathname(work));
  %if "&tfns" = ""  %then %let tfns= &sysjobid.&sysindex.;

  %let csortedby =;
   data _null_;
    dsid=open("work.&dset","i");
    csortedby=lowcase(attrc(dsid,"SORTEDBY"));
    call symput("csortedby",csortedby);
    rc=close(dsid);
   run;

  %if %length(&csortedby.) = 0  %then %do;
    data work.&dset.;
     set work.&dset.;
      ___ob___ = _n_;  %** create var to preserve sort order **;
    run;
    %let csortedby =___ob___;
  %end;


  ** create formats from long character data **;
  data _null_;
    length label $300;  %** increase length for single quotes normal max length is 256 for a var label **;
   set _conten2 ;
    where lowcase(name) not in(%sortvars(&csortedby.)); 
      %** end of where statement,  do not want to convert vars needed for sort order *;
   file "%nrbquote(&temp_dir.)/_&tfns._longcharvars.sas" ls=&maxfilewidth;
   name=left(name);
   if length(name)> &maxvarlength. then do;
     name=substr(name,1,&maxvarlength.);
     call symput('toolong',1);
   end;
   put " ";
   put " proc sort data=&dset;";
   put "  by " name ";";
   put " run;";
   put " ";
   put " data _conten2;";
   put "  set &dset.(keep= " name ");";
   put "  by " name ";";
   put "  if first." name ";";
   put " run;";
   put " ";
   put " data _conten2;";
   put "  set _conten2 (where=(" name "^=''));";

   invalidf=0;
   %let vallen=32;
   %if &isysver. < 9 %then %let vallen=8;
   %** check that variable name does not end in a number and if so then add "_" to name **;
   %do i=0 %to 9;
    if  substr(compress(name),length(compress(name)),1)="&i." or lowcase(name) in(%invalid_formats) then do;
     invalidf=1;
     if length(name) < &vallen. then 
        name_f = compress(name || "_");  %** add an underscore and period **; 
     else if length(name) = &vallen. or lowcase(name) in(%invalid_formats) then 
        %** overwrite last character with underscore and add period **;
        name_f = compress(substr(compress(name),1,length(compress(name))-1) || "_"); 
    end;
   %end;
   if invalidf=1 then do;
      put "  fmtname=compress(""" name_f """);";
      name_p = compress(name_f || ".");
   end;
   else do; %** if no change required **;
      put "  fmtname=compress(""" name """);";
      name_p = compress(name || ".");
   end;
   
   put "  start=_n_;";
   put "  ___st___=_n_;";
   put "  end=_n_;";
   put "  label=" name ";";  %** SAS versions 8+ have the same max length for vars and formats *;
   put " run;";
   put " ";

   put " proc format library=work cntlin=_conten2(keep=fmtname start end label);";
   put " run;";
   put " ";
   put " data &dset.(drop= " name " rename=(___st___=" name "));";
   put "  merge &dset.  _conten2(keep=" name " ___st___);";
   put "  by " name ";";
   if index(label,"'") then label=tranwrd(label,"'","''");  %** subinstr single quote with two single quotes **;
   put "  label ___st___ = '" label "';";
   put " run;";
   put " ";
   put " data &dset.;";
   put "  set &dset.;";
   

   put "  format " name " " name_p ";";
   put " run;";
   put " ";
  run;

  %include"&temp_dir./_&tfns._longcharvars.sas";
 
  %** return data to original sort oder **;
  proc sort data=&dset.;
   by &csortedby.;
  run;
  %if "&csortedby" = "___ob___" %then %do;
   data &dset;
    set &dset (drop=___ob___);
   run;
  %end;

 %end;  %** of if hasvars to process **;

 %if &toolong=1 %then %do;
    %put WARNING: CHAR2FMT truncated at least one character variable because it was longer than &maxvarlength.  *; 
 %end;
 %goto done;

 %fail1:;
    %put ERROR: CHAR2FMT did not run because you have a least one character variable that is longer than &maxvarlength.  *; 
    %let savastata_err=1;
 %goto done;

 %done:;
 proc datasets nodetails nolist nowarn;
  delete _conten2;
 run;
 quit;

 %let syslast=&dset.;

 options obs=&obs. &notes.;  %** Restore options. **;


%mend char2fmt;


%macro invalid_formats;
   "best"     , "binary"   , "comma"    , "commax"   , "d"        , "date"     , "datetime"
   "dateampm" , "day"      , "ddmmyy"   , "dollar"   , "dollarx"  , "downame"  , "e"       
   "eurdfdd"  , "eurdfde"  , "eurdfdn"  , "eurdfdt"  , "eurdfdwn" , "eurdfmn"  , "eurdfmy" 
   "eurdfwdx" , "eurdfwkx" , "float"    , "fract"    , "hex"      , "hhmm"     , "hour"    
   "ib"       , "ibr"      , "ieee"     , "julday"   , "julian"   , "percent"  , "minguo"  
   "mmddyy"   , "mmss"     , "mmyy"     , "monname"  , "month"    , "monyy"    , "negparen"
   "nengo"    , "numx"     , "octal"    , "pd"       , "pdjulg"   , "pdjuli"   , "pib"     
   "pibr"     , "pk"       , "pvalue"   , "qtr"      , "qtrr"     , "rb"       , "roman"   
   "s370ff"   , "s370fib"  , "s370fibu" , "s370fpd"  , "s370fpdu" , "s370fpib" , "s370frb" 
   "s370fzd"  , "s370fzdl" , "s370fzds" , "s370fzdt" , "s370fzdu" , "ssn"      , "time"    
   "timeampm" , "tod"      , "weekdate" , "weekdatx" , "weekday"  , "worddate" , "worddatx"
   "wordf"    , "words"    , "year"     , "yen"      , "yymm"     , "yymmdd"   , "yymon"   
   "yyq"      , "yyqr"     , "z"        , "zd"       , "f"


%mend invalid_formats;

%macro sortvars (varlist) ;
   %* local macro vars used by sortvars: *;
   %local i j;
         %let i=1;
         %do %while(%length(%scan(%cmpres(&varlist.),&i.,%str( ))) GT 0); 
               %local var&i.;
               %let var&i.= %scan(%cmpres(&varlist.),&i.,%str( ));
               %let i=%eval(&i. + 1);
         %end;
         %do j=1 %to %eval(&i.-1);
                  %sysfunc(compress(%str(%")&&var&j..%str(%")))
         %end;
%mend sortvars ;

